The required R packages for this analysis are:
##essential
library(tidyverse) # set of tidy packages (readr, tibble, dplyr, tidyr, ggplot2, ¿purr?)
library(tidyxl) # read untidy excel formats
library(readxl) # read excel files as tidy tables
library(drc) # fit dose-response models
library(mixtools) # analyze finete mixture models
##accesory
library(DiagrammeR) # create flowchart
theme_set(theme_bw())
DiagrammeR6#install.packages("DiagrammeR")
#library(DiagrammeR)
DiagrammeR("
graph LR
A[XLS data] -.-> |readxl+tidyxl| B{R data}
Z[CSV data] -.-> |plater| B{R data}
B --> C1[STD]
B --> E[ctr +/-]
B --> D1[UNK]
C1 -.-> |drc| C2[4pLL model]
C2 --> C3[Box-Cox]
C3 --> F{UNK Ab.units}
D1 --> D2[mean.OD]
D2 --> D3[OD %CV]
D3 --> F
F --> G1[Histogram]
F --> G2[Density]
F --> G3[QQPlot]
style Z fill:#ffffff, stroke:#000000, stroke-width:2px
style A fill:#ffffff, stroke:#000000, stroke-width:2px
style B fill:#ffffff, stroke:#000000, stroke-width:2px
style C1 fill:#ffffff, stroke:#000000, stroke-width:2px
style C2 fill:#ffffff, stroke:#000000, stroke-width:2px
style C3 fill:#ffffff, stroke:#000000, stroke-width:2px
style D1 fill:#ffffff, stroke:#000000, stroke-width:2px
style D2 fill:#ffffff, stroke:#000000, stroke-width:2px
style D3 fill:#ffffff, stroke:#000000, stroke-width:2px
style E fill:#ffffff, stroke:#000000, stroke-width:2px
style F fill:#ffffff, stroke:#000000, stroke-width:2px
style G1 fill:#ffffff, stroke:#000000, stroke-width:2px
style G2 fill:#ffffff, stroke:#000000, stroke-width:2px
style G3 fill:#ffffff, stroke:#000000, stroke-width:2px
")
The curve of the log-logistic symetric model describe the response f(x) dependent of the dose x and 04 parameters: \[ f(x)=f(x;b,c,d,e)=c+\frac{d-c}{1+\exp[b(log(x)-log(e))]}\ \] where:
c is the lower limit of the response when the dose x approaches infinity,d is the upper limit when the dose x approaches zero,b is the slope around the point of inflection, represented bye defined as effective dose and commmonly denoted as5:
ED50, EC50 or IC50 for continuous responses,LD50 or LC50 for binomial responses, and12 summary plots per ELISA Template:
getwd()
[1] "/home/avallec/Documents/Valle_GnB/0projects/R_/elixr"
#x <- tidyxl::tidy_xlsx("data-raw/raw/unap-tesis/RAFAEL-data/TEMPLATES Rafael.xlsx")$data$`TEMPLATE ELISA N°1`
x <- tidyxl::tidy_xlsx("data-raw/raw/unap-tesis/RAFAEL-data/TEMPLATES Rafael.xlsx")$data
#str(x)
y <- x[[3]] %>% #i
filter(row %in% 23:29,
col %in% 2:13) %>%
dplyr::select(address,row,col,numeric,character,local_format_id) %>%
unite(ID,c("numeric","character")) %>%
mutate(ID= stringr::str_replace(ID,"_NA|NA_", ""),
Plate= paste0("N",3)) %>% #i
dplyr::select(Plate,everything()) %>%
filter(!ID %in% c("C+","C-","NA","Blank")) #%>% group_by(ID) %>% slice(1) %>% ungroup()
y <- y[FALSE,]
#str(y)
for(i in 1:length(x)){
a <- x[[i]] %>% #i
filter(row %in% 23:29,
col %in% 2:13) %>%
dplyr::select(address,row,col,numeric,character,local_format_id) %>%
unite(ID,c("numeric","character")) %>%
mutate(ID= stringr::str_replace(ID,"_NA|NA_", ""),
Plate= paste0("N",i)) %>% #i
dplyr::select(Plate,everything()) %>%
filter(!ID %in% c("C+","C-","NA","Blank")) #%>% group_by(ID) %>% slice(1) %>% ungroup()
y <- union(y,a)
}
y <- y %>% arrange(Plate,row,col)
#y %>% dplyr::count(Plate)
#std.raw
y <- y %>%
#dplyr::count(ID) %>% dplyr::arrange(desc(n))
#filter(ID==2235)
#filter(ID==1856)
#filter(ID==3942)
mutate(pheno=ifelse(local_format_id %in% c(23,17,20,18,21,19,22),"asymptomatic",
ifelse(local_format_id %in% c(68,69,70,71,72,73,24),"symptomatic",
NA_character_)
)
) #%>%
#filter(ID==2235)
#filter(pheno=="asymptomatic") #%>% dplyr::select(ID)
#mutate(pheno=ifelse(ID %in% . %>% filter(pheno=="asymptomatic") %>% dplyr::select(ID),"asym","sym"))
w <- y %>% filter(pheno=="asymptomatic") %>% dplyr::select(ID)
phe <- y %>%
mutate(pheno= ifelse(ID %in% w$ID,"asymptomatic","symptomatic")) %>% # RESPETA pheno de PLACA 1 y 2 !
mutate(igg=ifelse(local_format_id %in% c(85),"igg1",
ifelse(local_format_id %in% c(87),"igg2",
ifelse(local_format_id %in% c(88,25),"igg3",
ifelse(local_format_id %in% c(90),"igg4",
"igg")
)
)
)
) %>% #group_by(ID,igg) %>% slice(1) %>% ungroup() %>%
dplyr::select(-local_format_id#,-address,-row,-col
) %>%
arrange(Plate,row,col) %>%
mutate(Plate= as.factor(Plate)) %>%
dplyr::select(-address,-starts_with("row"),-col#,-loc
) %>%
mutate(pheno=ifelse(Plate=="N2" |
Plate=="N6" |
Plate=="N7",
"symptomatic",pheno)) # RESPETAR pheno POR PLACA (OJO: más de un pheno por paciente)
#filter(ID=="3053") # asympt in template 6 y 7
#dplyr::count(pheno,igg)
#dplyr::count(igg)
#phe# %>% dplyr::count(Plate)
#phe %>% group_by(Plate) %>% slice(1)
#phe %>% dplyr::count(Plate,pheno,igg) %>% arrange(Plate)
wb_sheet <- readxl::excel_sheets("data-raw/raw/unap-tesis/RAFAEL-data/TEMPLATES Rafael.xlsx")
all <- data_frame(ID=as.character(),
OD=as.double(),
Plate=as.character(),
Type=as.character(),
Ab.unit=as.double(),
order=as.integer()
)
#str(all)
# 2 GENERATE R DATA.FRAME
for (j in 1:length(wb_sheet)) {
wb_Pf_main <- readxl::read_xlsx("data-raw/raw/unap-tesis/RAFAEL-data/TEMPLATES Rafael.xlsx",
range = "A21:M29",
sheet = j)#j
wb_Pf <- readxl::read_xlsx("data-raw/raw/unap-tesis/RAFAEL-data/TEMPLATES Rafael.xlsx",
range = "A35:M43",
sheet = j)#j
all_p <- wb_Pf_main %>%
dplyr::rename(row="X__1") %>%
gather(loc,ID,-row) %>%
mutate(loc=as.numeric(loc)) %>%
arrange(row,loc) %>%
mutate(ID= ifelse(row == "H" & loc == 9, "Blank", ID)) %>% #ANOTACION AUSCENTE EN TEMPLATES
full_join(
wb_Pf %>%
dplyr::rename(row="X__1") %>%
gather(loc,OD,-row) %>%
mutate(loc=as.numeric(loc)) %>%
arrange(row,loc)
) %>%
mutate(Plate=paste0("N",j),#j
Type=ifelse(row=="A" | ID=="Blank","std",
ifelse(ID=="C+" | ID=="C-","ctr","unk")),
Ab.unit=ifelse(row=="A",stringr::str_replace(ID,"STD 1/(.+)","\\1"),
ifelse(ID=="Blank","0",NA_character_))
) %>%
mutate(Ab.unit=as.numeric(Ab.unit)) %>%
mutate(Ab.unit=ifelse(row=="A",max(Ab.unit,na.rm=T)/Ab.unit,Ab.unit)) %>% #select(-row,-loc)
replace_na(list(ID = "na")) %>%
filter(ID!="na") %>%
mutate(order=seq(1,dim(.)[1])) %>%
dplyr::select(#-address,
-starts_with("row"),#-col,
-loc)
all <- union(all,all_p)
}
all <- all %>% arrange(Plate,order)
#all %>% dplyr::count(Type)
fin <- data_frame(Plate=as.character(),
order=as.integer(),
ID=as.character(),
Type=as.character(),
Ab.unit=as.double(),
OD=as.double(),
pheno=as.character(),
igg=as.character()
)
#str(fin)
for (j in 1:length(wb_sheet)) {
fin_p <- full_join(phe %>%
filter(Plate==levels(phe$Plate)[j]) %>% #requires j
mutate(order=seq(13,12+dim(.)[1])),
all %>%
filter(Plate==levels(phe$Plate)[j]) %>% #requires j
filter(Type=="unk") %>%
dplyr::select(-Plate,-ID),
by="order") %>%
dplyr::select(Plate,order,ID,Type,Ab.unit,OD,pheno,igg)
fin <- union(fin,fin_p)
}
fin <- fin %>% arrange(Plate,order)
#fin #%>% filter(ID==3053)
#OJO!!!! MALA ANOTACIÓN
#fin %>% filter(Plate=="N2") %>% dplyr::count(pheno)
#fin %>% filter(Plate=="N2") %>% filter(pheno=="asymptomatic")
#fin %>% dplyr::count(Plate)
end <- fin %>%
union(all %>%
filter(Type!="unk") %>%
dplyr::select(Plate,ID,Type,Ab.unit,OD,order) %>%
mutate(pheno=NA_character_,
igg=NA_character_)
) %>%
arrange(Plate,order) %>%
dplyr::select(-order)
#end #%>% filter(ID==2235)
# mod is mean od (plus sd and cv)
mod <- end %>%
filter(Type=="unk") %>%
group_by(Plate,igg,ID) %>%
summarise_at(vars(OD),c("mean","sd")) %>%
ungroup() %>%
dplyr::rename(mean.OD="mean",
sd.OD="sd") %>%
mutate(cv.OD=100*sd.OD/mean.OD) %>%
mutate(order=seq(1,dim(.)[1]))
#filter(ID=="1570")
mab <- end %>%
filter(Type=="unk") %>%
group_by(Plate,igg,ID) %>%
slice(1) %>%
ungroup() %>% #filter(ID=="1570")
mutate(order=seq(1,dim(.)[1])) %>%
full_join(mod %>% dplyr::select(order,mean.OD,sd.OD,cv.OD),
by="order") %>% #mutate(test.id= ID.x==ID.y) %>% dplyr::count(test.id)
dplyr::select(-order,-OD) %>%
mutate(ord=seq(1,dim(.)[1])) %>%
unite(code,ID,ord,sep="_",remove = F)
#mascara
blk <- end %>%
filter(ID=="Blank") %>%
group_by(Plate) %>% slice(1) %>% ungroup() %>%
dplyr::select(-OD)
#media por par de blancos por placa
std <- end %>%
filter(ID=="Blank") %>%
group_by(Plate) %>%
summarise_at(vars(OD),mean,na.rm=T) %>%
ungroup() %>%
#dplyr::rename(mean.OD="OD") %>%
full_join(blk,by="Plate") %>%
dplyr::select(Plate,ID,Type,Ab.unit,OD,everything()) %>%
union(end %>% filter(Type=="std" & ID!="Blank")) %>%
arrange(Plate,Ab.unit) %>%
mutate(Plate=as.factor(Plate))
mab_ir <- NULL
mod_bx <- NULL
#
# new dose levels as support for the line
#mdo$Ab.units %>% summary()
new_x <- expand.grid(exp(seq(log(0.1),log(2048),length=100)))
# db to add predictions of all plates
new <- data_frame(ord=as.character(),
resp=as.double(),
p=as.double(),
pmin=as.double(),
pmax=as.double(),
Plate=as.character())
#
for (j in 1:length(levels(phe$Plate))) {
#
# 5 PARAMETER ESTIMATION 4pLL model
#
wb.m1 <- drm(OD ~ Ab.unit, Plate,
data= std %>% filter(Plate==levels(phe$Plate)[j]),#j
#data= std,
fct = LL.4(names = c("b", "c", "d", "e")))
#
wb.model <- wb.m1
# 6 BOX-COX TRANSFORMATION against RESIDUAL heterogeneity
wb.model.BX <- boxcox(wb.model,
main=expression("Optimal " ~ lambda ~ " with confidence intervals"),
plotit = FALSE)
#coefficients(wb.model.BX) %>% matrix(7,4)
mab_p <- mab %>% as.data.frame()
# 7 UNK AB.UNITS ESTIMATION by INVERSE REGRESSION
mir <- ED(wb.model.BX,
mab_p[mab_p$Plate==levels(phe$Plate)[j],"mean.OD"],#j
#wb_MEAN[1:n,5],
type = "absolute",interval = "delta",
#clevel = "Pfal",
display = FALSE)
mab_ir <- rbind(mab_ir,mir)
mod_bx <- rbind(mod_bx,coefficients(wb.model.BX))
#
# predictions and confidence intervals
pdm <- predict(wb.model.BX, newdata = new_x, interval = "confidence")
# new data with predictions
new_p <- bind_cols(new_x %>%
as.tibble() %>%
rownames_to_column(var = "ord")
, pdm %>%
as.tibble() %>%
rownames_to_column(var = "ord")
) %>%
dplyr::select(-ord1) %>%
mutate(Plate=levels(phe$Plate)[j]) %>%
dplyr::rename(resp=Var1,p=Prediction,pmin=Lower,pmax=Upper)
new <- union(new,new_p)
#
}
Error in optim(startVec, opfct, hessian = TRUE, method = optMethod, control = list(maxit = maxIt, :
non-finite finite-difference value [2]
Error in optim(startVec, opfct, hessian = TRUE, method = optMethod, control = list(maxit = maxIt, :
non-finite finite-difference value [2]
Error in optim(startVec, opfct, hessian = TRUE, method = optMethod, control = list(maxit = maxIt, :
non-finite finite-difference value [2]
Error in optim(startVec, opfct, hessian = TRUE, method = optMethod, control = list(maxit = maxIt, :
non-finite finite-difference value [2]
Error in optim(startVec, opfct, hessian = TRUE, method = optMethod, control = list(maxit = maxIt, :
non-finite finite-difference value [2]
Error in optim(startVec, opfct, hessian = TRUE, method = optMethod, control = list(maxit = maxIt, :
non-finite finite-difference value [2]
#mab
#mab[mab$Plate==levels(phe$Plate)[1],] #%>% duplicated() %>% sum()
# 7.1 FEED UNK AB.UNITS DATA.FRAME
mdo <- mab_ir %>% as.data.frame() %>% rownames_to_column() %>%
#dplyr::rename(ord=rowname) %>%
separate(rowname,c("par","Plate","mean.OD.c"),sep = ":") %>%
rownames_to_column("ord") %>%
#mutate(ord=seq(1,dim(.)[1])) %>%
full_join(mab %>%
mutate(ord=as.character(ord)) %>%
mutate(mean.OD.c=as.character(mean.OD))
,
by = c("ord","Plate","mean.OD.c")) %>%
dplyr::select(Plate,ord,ID,code,Type,pheno,igg,mean.OD,sd.OD,cv.OD,Ab.units=Estimate,
everything(),-par,-mean.OD.c,-Ab.unit) %>%
filter(!code=="2235_137") # MANUAL FILTERING of replicate on different templates
mod_bt <- mod_bx %>% as.data.frame() %>% rownames_to_column() %>% as.tibble() %>%
dplyr::rename(Plate=rowname) %>%
mutate(Plate=stringr::str_replace(Plate,"(\\d)","N\\1"))
new <- new %>% mutate(ord=as.numeric(ord)) %>% arrange(Plate,ord)
#
#fin
#end
#mod
#mab
# standard curve data
std
# estimated ab unit data
mdo
# estimated parameters per standard curve
mod_bt
# predicted model per standard curve
new
#std
ctr <- end %>% filter(Type=="ctr")
#ctr %>% filter(Plate==levels(phe$Plate)[1] & ID=="C+") %>% .$OD
#std %>% filter(Plate==levels(phe$Plate)[1] & ID=="Blank") %>% .$OD
mdo %>%
ggplot(aes(mean.OD,cv.OD,colour=Plate)) +
geom_point() +
coord_cartesian(ylim = c(0,100)) +
geom_hline(aes(yintercept=20),linetype="dashed",size=0.3) +
geom_vline(aes(xintercept=0.25),linetype="dashed",size=0.3) +
facet_wrap(~Plate,ncol = 4) +
labs(title="QC plot: Intra-plate coefficient of variation")
std %>%
mutate(Ab.unit=ifelse(Ab.unit==0,0.5,Ab.unit)) %>%
ggplot(aes(Ab.unit,OD)) +
geom_hline(aes(yintercept=OD,linetype=ID),data=ctr) +
geom_point(aes(colour=Plate)) +
geom_ribbon(data=new, aes(x=resp, y=p, ymin=pmin, ymax=pmax), alpha=0.2) +
geom_line(data=new, aes(x=resp, y=p, colour=Plate)) +
#coord_trans(x="log") +
scale_x_log10() +
#theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
facet_wrap(~Plate,ncol = 4) +
labs(title="QC plot: 4-parameter log-logistic model per plate")
#xlab("Ferulic acid (mM)") + ylab("Root length (cm)")
std %>%
ggplot(aes(Ab.unit,OD,colour=Plate)) +
geom_point() +
facet_wrap(~Plate)
std %>%
ggplot(aes(log10(Ab.unit),OD,colour=Plate)) +
geom_hline(aes(yintercept=OD,linetype=ID),ctr,size=0.3) +
geom_point() +
facet_wrap(~Plate)
mdo %>%
ggplot(aes(Ab.units,mean.OD,colour=igg)) +
coord_cartesian(ylim = c(0,1)) +
geom_point() +
geom_errorbarh(aes(xmin=Lower,xmax=Upper), colour="black", size=.2) +
scale_x_log10() +
facet_wrap(~Plate+pheno,nrow = 2) +
labs(title="Estimates of antibody units (AU) per plate and phenotype")
# inverse regression method
#facet_wrap(igg~pheno,nrow = 2)
##### ab units
a <- mdo %>%
ggplot(aes(x=Ab.units,fill=pheno)) + theme_bw() #+
#scale_x_log10()
b <- a +
geom_histogram(alpha=.5,position = "identity") + facet_grid(~igg)
#ggsave("data-raw/raw/unap-tesis/RAFAEL-data/ab_hist.png")
c <- a +
geom_density(alpha=.5,position = "identity") + facet_grid(~igg)
#ggsave("data-raw/raw/unap-tesis/RAFAEL-data/ab_dens.png")
#facet_wrap(transform~measure,scales = "free")
Rmisc::multiplot(b,c,cols = 1)
##### ab units
a <- mdo %>%
ggplot(aes(x=Ab.units,fill=pheno)) + theme_bw() #+
#scale_x_log10()
b <- a +
geom_histogram(alpha=.5,position = "identity") +
facet_wrap(~igg, scale= "free", ncol = 5)
#ggsave("data-raw/raw/unap-tesis/RAFAEL-data/ab_hist.png")
c <- a +
geom_density(alpha=.5,position = "identity") +
facet_wrap(~igg, scale= "free", ncol = 5)
#ggsave("data-raw/raw/unap-tesis/RAFAEL-data/ab_dens.png")
#facet_wrap(transform~measure,scales = "free")
Rmisc::multiplot(b,c,cols = 1)
##### od
a <- mdo %>%
ggplot(aes(x=mean.OD,fill=pheno)) + theme_bw() #+
#scale_x_log10()
b <- a +
geom_histogram(alpha=.5,position = "identity") + facet_grid(~igg)
#ggsave("data-raw/raw/unap-tesis/RAFAEL-data/od_hist.png")
c <- a +
geom_density(alpha=.5,position = "identity") + facet_grid(~igg)
#ggsave("data-raw/raw/unap-tesis/RAFAEL-data/od_dens.png")
#facet_wrap(transform~measure,scales = "free")
Rmisc::multiplot(b,c,cols = 1)
##### ab units
a <- mdo %>%
ggplot(aes(x=Ab.units,fill=pheno)) + theme_bw() +
scale_x_log10()
b <- a +
geom_histogram(alpha=.5,position = "identity") + facet_grid(~igg)
#ggsave("data-raw/raw/unap-tesis/RAFAEL-data/ab_hist.png")
c <- a +
geom_density(alpha=.5,position = "identity") + facet_grid(~igg)
#ggsave("data-raw/raw/unap-tesis/RAFAEL-data/ab_dens.png")
#facet_wrap(transform~measure,scales = "free")
Rmisc::multiplot(b,c,cols = 1)
##### ab units
a <- mdo %>%
ggplot(aes(x=Ab.units,fill=pheno)) + theme_bw() +
scale_x_log10()
b <- a +
geom_histogram(alpha=.5,position = "identity") +
facet_wrap(~igg, scale= "free", ncol = 5)
#ggsave("data-raw/raw/unap-tesis/RAFAEL-data/ab_hist.png")
c <- a +
geom_density(alpha=.5,position = "identity") +
facet_wrap(~igg, scale= "free", ncol = 5)
#ggsave("data-raw/raw/unap-tesis/RAFAEL-data/ab_dens.png")
#facet_wrap(transform~measure,scales = "free")
Rmisc::multiplot(b,c,cols = 1)
##### od
a <- mdo %>%
ggplot(aes(x=mean.OD,fill=pheno)) + theme_bw() +
scale_x_log10()
b <- a +
geom_histogram(alpha=.5,position = "identity") + facet_grid(~igg)
#ggsave("data-raw/raw/unap-tesis/RAFAEL-data/od_hist.png")
c <- a +
geom_density(alpha=.5,position = "identity") + facet_grid(~igg)
#ggsave("data-raw/raw/unap-tesis/RAFAEL-data/od_dens.png")
#facet_wrap(transform~measure,scales = "free")
Rmisc::multiplot(b,c,cols = 1)
cova <- readxl::read_xlsx("data-raw/raw/unap-tesis/RAFAEL-data/Base Rafael.xlsx",
range = "A1:P59",
sheet = 1) %>%
slice(-1) %>%
dplyr::rename(EDAD="X__1",SEXO="X__2") %>%
rename_all(funs(stringr::str_to_lower(.))) %>%
dplyr::select(codigo:gametocitos) %>%
dplyr::select(codigo,condicion="condición",edad,sexo,comunidad,fiebre)
covb <- readxl::read_xlsx("data-raw/raw/unap-tesis/RAFAEL-data/DENSIDADES.xlsx",
range = "A1:E59",
sheet = 1) %>%
slice(-1) %>%
rename_all(funs(stringr::str_to_lower(.))) %>%
dplyr::select(1:4) %>%
dplyr::rename(par="parástios/ul de sangre") %>%
dplyr::select(-4) %>%
dplyr::rename(condicion="condición")
edad y sexo.
parasitemiapar=0¿variable condición?
SOLVE THIS!! AFFECTS HERE
covx <- full_join(cova, covb, by="codigo")
covx %>%
dplyr::count(codigo) %>% arrange(desc(n)) %>% filter(n!=1)
#covx %>% filter(codigo=="2235" | codigo=="3053" | codigo=="9165" | codigo=="9801") %>% arrange(codigo) %>%
# dplyr::select(codigo,edad,sexo,par)
cova %>% filter(codigo=="2235" | codigo=="3053" | codigo=="9165" | codigo=="9801") %>% arrange(codigo) %>%
dplyr::select(codigo,edad,sexo)
covb %>% filter(codigo=="2235" | codigo=="3053" | codigo=="9165" | codigo=="9801") %>% arrange(codigo) %>%
dplyr::select(codigo,par)
# par covariates finale
covf <- covb %>% filter(!(codigo=="2235" & par==0 | codigo=="3053" & par==0 | codigo=="9165" & par==0 | codigo=="9801" & par==0)) %>%
dplyr::rename(ID=codigo)
mdo %>% dplyr::count(ID)
mco <- full_join(mdo,covf,by="ID") %>% #dplyr::count(pheno,condicion)
dplyr::select(-condicion) %>%
mutate(Ab.units_log=log10(Ab.units))
mcv <- covx %>%
#filter(codigo=="2235" | codigo=="3053" | codigo=="9165" | codigo=="9801") %>%
filter(!(codigo=="2235" |
codigo=="3053" |
codigo=="9165" |
codigo=="9801")) %>%
#filter(codigo=="2235" | codigo=="3053" | codigo=="9165" | codigo=="9801") %>%
arrange(codigo) %>%
dplyr::select(codigo,edad,sexo,par) %>%
dplyr::rename(ID=codigo) %>%
full_join(mdo %>%
group_by(ID) %>%
slice(1) %>%
ungroup()
,by="ID") %>%
dplyr::select(ID,edad,sexo,par,pheno) %>%
#mutate(sexo=forcats::fct_recode(sexo,
# "sex1"="1", "sex0"="0")) %>%
mutate(sexo=as.factor(sexo),
pheno=as.factor(pheno),
edad=as.numeric(edad))
mcv_u <- Hmisc::upData(mcv %>% dplyr::select(-ID),
labels = c(edad="Age",
sexo="Sex",
par="Parasite density"
),
units = c(edad="(years)",
par="(par/uL)"
),
levels = list(pheno=list("Asymptomatic"="asymptomatic",
"Symptomatic"="symptomatic"),
sexo=list("Female"="0",
"Male"="1") #???
)
)
Input object size: 3344 bytes; 4 variables 53 observations
New object size: 4608 bytes; 4 variables 53 observations
Hmisc::html(Hmisc::contents(mcv_u), maxlevels=10, levelType='table')
| Name | Labels | Units | Levels | Class | Storage | NAs |
|---|---|---|---|---|---|---|
| edad | Age | (years) | integer | integer | 4 | |
| sexo | Sex | 2 | integer | 4 | ||
| par | Parasite density | (par/uL) | integer | integer | 4 | |
| pheno | 2 | integer | 0 |
| Variable | Levels |
|---|---|
| sexo | Female |
| Male | |
| pheno | Asymptomatic |
| Symptomatic |
s1 <- Hmisc::summaryM(sexo + edad + par ~ pheno,
data=mcv_u,
overall=FALSE, test=TRUE)
Error: `x` must be a numeric or a character vector
Hmisc::latex(s1, caption='Sample covariates',
exclude1=TRUE, #npct='both',
test=TRUE , prtest="P",file="",
digits=3, prn=FALSE,
#prmsd=TRUE, brmsd=TRUE, #msdsize=mu$smaller2, #NOT-EVALUATE if PDF
middle.bold=TRUE, long = FALSE,
#legend.bottom = TRUE, #insert.bottom = TRUE,
what="%", html = TRUE,
width="100%"
) #change here for LaTeX PDF
Error in Hmisc::latex(s1, caption = "Sample covariates", exclude1 = TRUE, :
object 's1' not found
readr::write_rds(mcv_u,"data/unap-rafa-covar.rds")
a <- mco %>% #filter(igg=="igg3") %>%
ggplot(aes(x=Ab.units
#,fill=pheno
#,fill=igg
)) + theme_bw() +
#scale_x_log10() +
geom_density(alpha=.5,position = "identity") +
geom_histogram(aes(x=Ab.units,..density..),
alpha=.5,position = "identity") +
#theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title="AU linear distribution")
b <- mco %>% #filter(igg=="igg3") %>%
ggplot(aes(sample=Ab.units
#,fill=pheno
#,fill=igg
)) +
geom_qq(alpha=.2) +
geom_qq_line(line.p = c(0.25, 0.75)) +
labs(title="Gaussian quantile-quantile plot") +
coord_cartesian(ylim = c(0,2000))
Rmisc::multiplot(a,b,cols = 2)
mco %>%
ggplot(aes(x=Ab.units
#,fill=pheno
)) + theme_bw() +
#scale_x_log10() +
geom_density(alpha=.5,position = "identity",adjust= 1/2
) +
geom_histogram(aes(x=Ab.units,..density..),
alpha=.5,position = "identity") +
facet_wrap(~igg, scales = "free",ncol = 5) +
labs(title="AU linear distribution per IgG subtype")
# source: http://tinyheero.github.io/2015/10/13/mixture-model.html
f <- mco %>% mutate(igg=as.factor(igg)) %>%
mutate(igg=forcats::fct_relevel(igg,"igg","igg1","igg2","igg3")) %>% .$igg
#library("mixtools")
#' Plot a Mixture Component
#'
#' @param x Input data
#' @param mu Mean of component
#' @param sigma Standard deviation of component
#' @param lam Mixture weight of component
plot_mix_comps <- function(x, mu, sigma, lam) {
lam * dnorm(x, mu, sigma)
}
set.seed(1)
#length(levels(f))
#wait <- mco %>% filter(igg=="igg2") %>%
# .$Ab.units %>% log()
llmix <- data_frame(igg=as.character(),
kpr=as.numeric(),
lik=as.numeric())
for (j in 2:3) {
mixmdl_p <- normalmixEM(mco %>%
#filter(igg==levels(f)[i]) %>%
.$Ab.units #%>% log10()
,
k = j)
llmix <- llmix %>%
union(data_frame(igg="all",#levels(f)[i],
kpr=j,
lik=mixmdl_p$loglik))
}
number of iterations= 37
number of iterations= 60
llmix %>% arrange(igg,desc(lik)) %>% mutate(aic=2*kpr-2*lik)
#
llmix <- data_frame(igg=as.character(),
kpr=as.numeric(),
lik=as.numeric())
for (i in 1:length(levels(f))) {
for (j in 2:3) {
mixmdl_p <- normalmixEM(mco %>%
filter(igg==levels(f)[i]) %>%
.$Ab.units #%>% log10()
,
k = j)
llmix <- llmix %>%
union(data_frame(igg=levels(f)[i],
kpr=j,
lik=mixmdl_p$loglik))
}
}
number of iterations= 23
One of the variances is going to zero; trying new starting values.
One of the variances is going to zero; trying new starting values.
One of the variances is going to zero; trying new starting values.
number of iterations= 81
number of iterations= 60
number of iterations= 68
number of iterations= 36
number of iterations= 79
number of iterations= 20
number of iterations= 527
number of iterations= 15
number of iterations= 40
llmix %>% arrange(igg,desc(lik)) %>% mutate(aic=2*kpr-2*lik) #%>%
#group_by(igg) %>% filter(lik==max(lik))
#mixmdl <- normalmixEM(wait, k = 3)
#mixmdl$loglik
### FOR ALL AB.UNITS (NO IGG SUBTYPES)
set.seed(1)
#for (i in 1:length(levels(f))) {
wait <- mco %>% #filter(igg==levels(f)[i]) %>%
.$Ab.units #%>% log10()
mixmdl <- normalmixEM(wait, k = 3)
number of iterations= 41
###
r <- data.frame(x = mixmdl$x) %>%
ggplot() +
geom_histogram(aes(x, ..density..),
#binwidth = 1,
#colour = "black",
#fill = "gray",
alpha=.5,position = "identity"
) +
stat_function(geom = "line",
fun = plot_mix_comps,
args = list(mixmdl$mu[1],
mixmdl$sigma[1],
lam = mixmdl$lambda[1]),
colour = "green", lwd = 1.5) +
stat_function(geom = "line",
fun = plot_mix_comps,
args = list(mixmdl$mu[2],
mixmdl$sigma[2],
lam = mixmdl$lambda[2]),
colour = "blue", lwd = 1.5) +
stat_function(geom = "line",
fun = plot_mix_comps,
args = list(mixmdl$mu[3],
mixmdl$sigma[3],
lam = mixmdl$lambda[3]),
colour = "red", lwd = 1.5) +
ylab("Density") +
xlab("Ab.units") +
labs(title= paste0("Ab.units: ",
#ifelse(levels(f)[i]=="igg","IgG ",
# ifelse(levels(f)[i]=="igg1","IgG1 ",
# ifelse(levels(f)[i]=="igg2","IgG2 ",
# ifelse(levels(f)[i]=="igg3","IgG3 ","IgG4 ")
# )
# )
# ),
"3-component distribution"
#,": LogLik=",
#mixmdl$loglik %>% format(digits=3)
))
#+ scale_x_log10()
####
u <- 0.90 # 90% classification probability
post.df <- as.data.frame(cbind(x = mixmdl$x, mixmdl$posterior)) %>%
mutate(comp.12=comp.1+comp.2) %>% # sum probabilities of s+ and s++
mutate(label = ifelse(comp.3 > u, "s-",
ifelse(comp.12 > u, "s+", "s0"
#ifelse(comp.1 > u,"s++","s0")
))) %>%
mutate(label=forcats::fct_relevel(label,"s-","s0","s+"#,"s++"
))
s <- post.df %>%
ggplot(aes(x = factor(label))) +
geom_bar() +
xlab("Component") +
ylab("Number of Data Points") +
labs(title="Classification")
###
t <- post.df %>%
ggplot() +
#geom_line(aes(x,comp.1), colour="green", lwd = 1.5) +
#geom_line(aes(x,comp.2), colour="blue", lwd = 1.5) +
geom_line(aes(x,comp.12), colour="blue", lwd = 1.5) +
geom_line(aes(x,comp.3), colour="red", lwd = 1.5) +
geom_hline(yintercept = u, col = "black") +
#geom_vline(xintercept = cutoffs[2], col = "black", lty=3) +
#geom_vline(xintercept = cutoffs[1], col = "black", lty=3) +
xlab("Ab.units") +
ylab("classification probability") +
labs(title="Cutoff")
Rmisc::multiplot(r,t,s,cols = 3)
#}
#sum(mco$Ab.units_log == mixmdl$x)
#length(mixmdl$x)
#sum(!post.df$x==mco$Ab.units_log)
msr <- inner_join(mco %>% rownames_to_column(),
post.df %>%
#dplyr::rename(Ab.units_log=x) %>%
dplyr::select(#Ab.units_log,
label) %>%
rownames_to_column(),
by="rowname") #%>%
#mutate(test= Ab.units_log.x==Ab.units_log.y) %>% dplyr::count(test)
msr <- msr[FALSE,] #%>% glimpse()
set.seed(1)
for (i in 1:length(levels(f))) {
wait <- mco %>% filter(igg==levels(f)[i]) %>%
.$Ab.units #%>% log10()
mixmdl <- normalmixEM(wait, k = 3)
###
r <- data.frame(x = mixmdl$x) %>%
ggplot() +
geom_histogram(aes(x, ..density..),
#binwidth = 1,
#colour = "black",
#fill = "gray",
alpha=.5,position = "identity"
) +
stat_function(geom = "line",
fun = plot_mix_comps,
args = list(mixmdl$mu[1],
mixmdl$sigma[1],
lam = mixmdl$lambda[1]),
colour = "red", lwd = 1.5) +
stat_function(geom = "line",
fun = plot_mix_comps,
args = list(mixmdl$mu[2],
mixmdl$sigma[2],
lam = mixmdl$lambda[2]),
colour = "blue", lwd = 1.5) +
#stat_function(geom = "line",
# fun = plot_mix_comps,
# args = list(mixmdl$mu[3],
# mixmdl$sigma[3],
# lam = mixmdl$lambda[3]),
# colour = "green", lwd = 1.5) +
ylab("Density") +
xlab("Ab.units") +
labs(title= paste0(ifelse(levels(f)[i]=="igg","IgG: ",
ifelse(levels(f)[i]=="igg1","IgG1: ",
ifelse(levels(f)[i]=="igg2","IgG2: ",
ifelse(levels(f)[i]=="igg3","IgG3 ","IgG4: ")
)
)
),
"3-component distribution"
#,": LogLik=",
#mixmdl$loglik %>% format(digits=3)
))
#+ scale_x_log10()
####
u <- 0.90 # 90% classification probability
post.df <- as.data.frame(cbind(x = mixmdl$x, mixmdl$posterior)) %>%
mutate(comp.sp=ifelse(mean(comp.2)>100,
comp.2+comp.3,
comp.1+comp.2)) %>% # sum probabilities of s+ and s++
mutate(label = ifelse(comp.1 > u, "s-",
#ifelse(comp.sp > u, "s+", "s0"
ifelse(comp.2 > u, "s+", "s0"
#ifelse(comp.1 > u,"s++","s0")
))) %>%
mutate(label=forcats::fct_relevel(label,"s-","s0","s+"#,"s++"
))
s <- post.df %>%
ggplot(aes(x = factor(label))) +
geom_bar() +
xlab("Component") +
ylab("Number of Data Points") +
labs(title="Classification")
###
t <- post.df %>%
ggplot() +
#geom_line(aes(x,comp.1), colour="green", lwd = 1.5) +
geom_line(aes(x,comp.2), colour="blue", lwd = 1.5) +
#geom_line(aes(x,comp.sp), colour="blue", lwd = 1.5) +
geom_line(aes(x,comp.1), colour="red", lwd = 1.5) +
geom_hline(yintercept = u, col = "black") +
#geom_vline(xintercept = 63.6, col = "black", lty=3) +
#geom_vline(xintercept = 69.7, col = "black", lty=3) +
xlab("Ab.units") +
ylab("classification probability") +
labs(title="Cutoff")
###
msr_p <- inner_join(mco %>%
filter(igg==levels(f)[i]) %>%
rownames_to_column(),
post.df %>%
#dplyr::rename(Ab.units_log=x) %>%
dplyr::select(#Ab.units_log,
label) %>%
rownames_to_column(),
by="rowname") #%>%
#mutate(test= Ab.units_log.x==Ab.units_log.y) %>% dplyr::count(test)
msr <- union(msr, msr_p)
Rmisc::multiplot(r,t,s,cols = 3)
}
igg subtypes.igg2 and igg4 seropositivity.
s0igg4 in order to explain its:
s0, which is higher than the previous threshold, this is not the best classification.igg the threshold of seropositivity is lower than 40 AU, under the right criteria.set.seed(1)
# i= 3
for (i in 1:length(levels(f))) {
wait <- mco %>% filter(igg==levels(f)[i]) %>%
.$Ab.units #%>% log10()
mixmdl <- normalmixEM(wait, k = 3)
###
r <- data.frame(x = mixmdl$x) %>%
ggplot() +
geom_histogram(aes(x, ..density..),
#binwidth = 1,
#colour = "black",
#fill = "gray",
alpha=.5,position = "identity"
) +
stat_function(geom = "line",
fun = plot_mix_comps,
args = list(mixmdl$mu[1],
mixmdl$sigma[1],
lam = mixmdl$lambda[1]),
colour = "red", lwd = 1.5) +
stat_function(geom = "line",
fun = plot_mix_comps,
args = list(mixmdl$mu[2],
mixmdl$sigma[2],
lam = mixmdl$lambda[2]),
colour = "blue", lwd = 1.5) +
stat_function(geom = "line",
fun = plot_mix_comps,
args = list(mixmdl$mu[3],
mixmdl$sigma[3],
lam = mixmdl$lambda[3]),
colour = "green", lwd = 1.5) +
ylab("Density") +
xlab("Ab.units") +
labs(title= paste0(ifelse(levels(f)[i]=="igg","IgG: ",
ifelse(levels(f)[i]=="igg1","IgG1: ",
ifelse(levels(f)[i]=="igg2","IgG2: ",
ifelse(levels(f)[i]=="igg3","IgG3 ","IgG4: ")
)
)
),
"3-component distribution"
#,": LogLik=",
#mixmdl$loglik %>% format(digits=3)
))
#+ scale_x_log10()
####
u <- 0.90 # 90% classification probability
post.df <- as.data.frame(cbind(x = mixmdl$x, mixmdl$posterior)) %>%
#mutate(comp.12=comp.1+comp.2,
# comp.23=comp.2+comp.3) %>%
mutate(comp.sp=if_else(rep(mixmdl$mu[2]>40,length(mixmdl$x)), # UMBRAL ARBITRARIO!!
comp.2+comp.3, # sero+ equals to the sum of comp 2+3
comp.3)) %>% # sero+ equals to the sum of comp 3
mutate(comp.sn=if_else(rep(mixmdl$mu[2]>40,length(mixmdl$x)),
comp.1, # sero- equals to the sum of comp 1
comp.1+comp.2)) %>% # sero+ equals to the sum of comp 1+2
mutate(label = if_else(comp.sn > u, "s-",
ifelse(comp.sp > u, "s+", "s0"
#ifelse(comp.2 > u, "s+", #"s0"
#ifelse(comp.1 > u,"s++","s0")
))) %>%
mutate(label=forcats::fct_relevel(label,"s-","s0","s+"#,"s++"
))
s <- post.df %>%
ggplot(aes(x = factor(label))) +
geom_bar() +
xlab("Component") +
ylab("Number of Data Points") +
labs(title="Classification")
###
t <- post.df %>%
ggplot() +
#geom_line(aes(x,comp.1), colour="green", lwd = 1.5) +
#geom_line(aes(x,comp.2), colour="blue", lwd = 1.5) +
#geom_point(aes(x,comp.sp), colour="blue", lwd = 1.5) +
geom_line(aes(x,comp.sp), colour="blue", lwd = 1.5) +
#geom_point(aes(x,comp.sn), colour="red", lwd = 1.5) +
geom_line(aes(x,comp.sn), colour="red", lwd = 1.5) +
geom_hline(yintercept = u, col = "black") +
#geom_vline(xintercept = 63.6, col = "black", lty=3) +
#geom_vline(xintercept = 69.7, col = "black", lty=3) +
xlab("Ab.units") +
ylab("classification probability") +
labs(title="Cutoff")
###
msr_p <- inner_join(mco %>%
filter(igg==levels(f)[i]) %>%
rownames_to_column(),
post.df %>%
#dplyr::rename(Ab.units_log=x) %>%
dplyr::select(#Ab.units_log,
label) %>%
rownames_to_column(),
by="rowname") #%>%
#mutate(test= Ab.units_log.x==Ab.units_log.y) %>% dplyr::count(test)
msr <- union(msr, msr_p)
Rmisc::multiplot(r,t,s,cols = 3)
}
One of the variances is going to zero; trying new starting values.
One of the variances is going to zero; trying new starting values.
number of iterations= 71
number of iterations= 73
number of iterations= 22
number of iterations= 475
number of iterations= 34
#mco
#msr
glimpse(
msr %>%
mutate(ord=as.numeric(ord)) %>% arrange(ord) %>%
dplyr::select(-Ab.units_log, -rowname)
)
Observations: 232
Variables: 16
$ Plate <chr> "N1", "N1", "N1", "N1", "N1", "N1", "N1", "N1", "N1", "N1", "N1"...
$ ord <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 1...
$ ID <chr> "1570", "1843", "1856", "1910", "1969", "1977", "2131", "2134", ...
$ code <chr> "1570_1", "1843_2", "1856_3", "1910_4", "1969_5", "1977_6", "213...
$ Type <chr> "unk", "unk", "unk", "unk", "unk", "unk", "unk", "unk", "unk", "...
$ pheno <chr> "asymptomatic", "symptomatic", "asymptomatic", "asymptomatic", "...
$ igg <chr> "igg", "igg", "igg", "igg", "igg", "igg", "igg", "igg", "igg", "...
$ mean.OD <dbl> 0.5485, 0.1825, 0.6260, 0.3070, 0.6220, 0.5310, 0.6615, 0.5795, ...
$ sd.OD <dbl> 0.0205060967, 0.0007071068, 0.0141421356, 0.0098994949, 0.000000...
$ cv.OD <dbl> 3.7385773, 0.3874558, 2.2591271, 3.2245912, 0.0000000, 7.1909164...
$ Ab.units <dbl> 94.496989, 9.591621, 182.796839, 23.028486, 175.224706, 83.84476...
$ `Std. Error` <dbl> 27.4859252, 1.2766621, 63.3962207, 4.1773628, 60.1332973, 23.554...
$ Lower <dbl> 32.319506, 6.703611, 39.384624, 13.578635, 39.193737, 30.560885,...
$ Upper <dbl> 156.674471, 12.479631, 326.209054, 32.478337, 311.255675, 137.12...
$ par <dbl> 4020, 1960, 1078, 406, 0, 2716, 498, 4866, 1094, 2010, 621, 824,...
$ label <fctr> s+, s0, s+, s+, s+, s+, s+, s+, s-, s+, s+, s+, s-, s+, s+, s+,...
#summary(mdo)
readr::write_csv(
msr %>%
mutate(ord=as.numeric(ord)) %>% arrange(ord) %>%
dplyr::select(-Ab.units_log, -rowname)
,"data/unap-rafael.csv")
readr::write_rds(
msr %>%
mutate(ord=as.numeric(ord)) %>% arrange(ord) %>%
dplyr::select(-Ab.units_log, -rowname)
, "data/unap-rafael.rds")
mco %>%
group_by(igg) %>% dplyr::count(pheno)
f <- mco %>% mutate(igg=as.factor(igg)) %>% .$igg
#mco <- mco %>%
# #dplyr::select(Ab.units,par) %>%
# mutate(Ab.units=log10(Ab.units),par=log10(par)) %>%
# filter(par!=-Inf) #%>%
stt <- data_frame(##estimate=as.double(),
##estimate1=as.double(),
##estimate2=as.double(),
statistic=as.double(),
p.value=as.double(),
#parameter=as.double(),#
#conf.low=as.double(),#
#conf.high=as.double(),#
method=as.factor(NULL),
alternative=as.factor(NULL),
igg=as.character(),
lab=as.character())
for (l in 1:length(levels(f))) {
## Primero, Prueba de Hipótesis para comparar varianzas:
i <- mco %>% filter(igg==levels(f)[l]) %>%
var.test(Ab.units_log ~ pheno, data = .) %>% broom::tidy()
## Rpta: Bajo n.s. 0.05, F cae en Región de no-Rechazo de Hipótesis Nula (RnoRHo)
## Conclusión: Supuesto de igualdad de varianzas poblacionales SÍ es válido
## Segundo, Prueba de Hipótesis para comparar medias:
j <- mco %>% filter(igg==levels(f)[l]) %>%
#t.test(Ab.units_log ~ pheno, data = ., var.equal= i$p.value>0.05) %>%
#wilcox.test(Ab.units_log ~ pheno, data = .) %>%#,
wilcox.test(Ab.units ~ pheno, data = .) %>%#,
#conf.int=TRUE) %>%
broom::tidy() %>% #%>% format(digits=2)
dplyr::select(-starts_with("estimate")) %>%
mutate(igg=levels(f)[l],
lab= paste0(#"t=",#should be done -> mix variance equality test
#"W=",#non-parametric used in literature
#statistic %>% format(digits=2),
#", df=",parameter %>% format(digits=2),
#", P",ifelse(p.value<0.001,"<0.001",
"italic('P')",ifelse(p.value<0.001,"<0.001",
paste0("==",p.value %>% format(digits=2))
)
)
)
stt <- stt %>% union(j)
}
stt <- stt %>% arrange(igg) %>% dplyr::select(igg,everything()) %>%
mutate(x=.5,
y=.15) %>%
mutate(igg=forcats::fct_recode(igg,
"IgG"="igg",
"IgG1"="igg1",
"IgG2"="igg2",
"IgG3"="igg3",
"IgG4"="igg4"
))
# plot
mco %>% #filter(igg==levels(f)[1]) %>%
mutate(igg=forcats::fct_recode(igg,
"IgG"="igg",
"IgG1"="igg1",
"IgG2"="igg2",
"IgG3"="igg3",
"IgG4"="igg4"
),
pheno=forcats::fct_recode(pheno,
"Asymptomatic"="asymptomatic",
"Symptomatic"="symptomatic")) %>%
ggplot(aes(pheno,Ab.units)) +
geom_boxplot(#position=position_dodge(0.8)
) +
geom_dotplot(#aes(fill=sev_WHO),
binaxis='y', stackdir='center', alpha=.3,
dotsize=1#, position=position_dodge(0.8)
) +
facet_wrap(~igg,ncol = 5) +
geom_text(aes(x,y,label=lab),data = stt,parse = T,
#vjust=.5,hjust=0.05,size=3.5) +
vjust=.5,hjust=0.05,size=3.5) + # if log10, then change it.
#coord_cartesian(ylim = c(-300,2000)) + # if log10, then change it.
scale_y_log10() +
#labs(title="Test AU mean equality among phenotypes per IgG subtypes") +
xlab("Carrier") + ylab("Antibody units (log scale)")
msr %>% group_by(igg,pheno) %>% dplyr::summarise(min=min(Ab.units),
q25=quantile(Ab.units) %>% .[2],
mean=mean(Ab.units),
q50=quantile(Ab.units) %>% .[3],
q75=quantile(Ab.units) %>% .[4],
max=max(Ab.units))
msr %>% dplyr::count(igg,label) %>%
group_by(igg) %>% mutate(tot=sum(n),prop= (n*100)/(sum(n))) %>%
filter(label=="s+")
msr %>%
mutate(label=forcats::fct_relevel(label,"s-","s+","s0"#,"s++"
)) %>%
ggplot(aes(x=igg,fill=label)) +
geom_bar(position = "fill") +
#facet_grid(~igg) +
labs(title="Proportion of seropositives per IgG subtype")
msr %>% dplyr::count(igg,pheno,label) %>%
group_by(igg,pheno) %>% mutate(tot=sum(n),prop= (n*100)/(sum(n))) %>%
filter(label=="s+") #%>%
#ungroup() %>%
#dplyr::select(-n,-label, -tot) %>%
#spread(pheno,prop)
msr %>%
dplyr::select(pheno,igg,label) %>%
#filter(igg==levels(f)[i]) %>%
#dplyr::select(-igg) %>%
group_by(igg,pheno,label) %>% dplyr::summarise(n=n()) %>% ungroup() %>%
spread(label,n)
ftt <- data_frame(igg=as.character(),
p.value=as.double(),
#estimate=as.double(),
method=as.character()#,
#alternative=as.factor()
)
for (i in 1:length(levels(f))) {
fst <- msr %>%
filter(!label=="s0") %>%
dplyr::select(pheno,igg,label) %>%
filter(igg==levels(f)[i]) %>%
dplyr::select(-igg) %>%
group_by(pheno,label) %>% dplyr::summarise(n=n()) %>% ungroup() %>%
#spread(label,n) %>% as.matrix() %>%
reshape2::acast(pheno ~ label,
value.var = "n") #%>% #class()
fst[is.na(fst)] <- 0
ftt <- ftt %>%
union(
fisher.test(fst) %>% broom::tidy() %>%
mutate(igg=levels(f)[i]) %>%
dplyr::select(igg,p.value,
#estimate,
method#,alternative
)
)
}
ftt %>% arrange(igg) %>%
mutate(sig=ifelse(p.value<0.05,"signif","not-signif"))
fttt <- ftt %>% arrange(igg) %>% dplyr::select(igg,everything()) %>%
mutate(x=.5,
y=-.05) %>%
mutate(lab= paste0("italic('P')",ifelse(p.value<0.001,"<0.001",
paste0("==",p.value %>% format(digits=2))
)
)
) %>%
mutate(lab=stringr::str_replace(lab,"(P = 0\\...)(.+)","\\1")) %>%
mutate(igg=forcats::fct_recode(igg,
"IgG"="igg",
"IgG1"="igg1",
"IgG2"="igg2",
"IgG3"="igg3",
"IgG4"="igg4"
))
msr %>%
mutate(igg=forcats::fct_recode(igg,
"IgG"="igg",
"IgG1"="igg1",
"IgG2"="igg2",
"IgG3"="igg3",
"IgG4"="igg4"
),
pheno=forcats::fct_recode(pheno,
"Asymptomatic"="asymptomatic",
"Symptomatic"="symptomatic")) %>%
mutate(label=forcats::fct_relevel(label,"s-","s+","s0"#,"s++"
)) %>%
full_join(fttt %>% dplyr::select(igg,lab,y,x),by = "igg") %>%
mutate(lab=if_else(label=="s+",lab,NA_character_)) %>%
arrange(lab) %>%
mutate(lab=if_else(igg==lead(igg),NA_character_,lab)) %>%
filter(!label=="s0") %>%
ggplot(aes(x=pheno,fill=label)) +
geom_bar(position = "fill") +
facet_grid(~igg) +
coord_cartesian(ylim = c(-.12,1)) +
geom_text(aes(x=x,
y=y,label=lab),parse = T,#position = "fill",#data = fttt,,
##vjust=.5,hjust=0.05,size=3.5) +
vjust=1,hjust=0.0005,size=3.5
) + # if log10, then change it.
#labs(title="Proportion of seropositives among phenotypes per IgG subtype") +
xlab("Carrier") + ylab("Proportion") +
scale_fill_grey(#start = 0, end = .9,
name="Serology",labels=c("Negative","Positive"#,"Indeterminate"
))
##aqui
s1 <- Hmisc::summaryM(label
~ pheno,
data=msr %>% filter(igg=="igg"),
overall=FALSE, test=TRUE)
Hmisc::latex(s1, caption='Sample covariates',
exclude1=TRUE, npct='both',
digits=3,
#prmsd=TRUE, brmsd=TRUE, #msdsize=mu$smaller2, #NOT-EVALUATE if PDF
middle.bold=TRUE, long = TRUE,
#legend.bottom = TRUE, #insert.bottom = TRUE,
what="%", html = TRUE, width="100%"
) #change here for LaTeX PDF
| Sample covariates. | |||
| asymptomatic N=27 |
symptomatic N=29 |
Test Statistic |
|
|---|---|---|---|
| label | χ22=3.41, P=0.182 | ||
| s- | 7% 2⁄27 | 24% 7⁄29 | |
| s0 | 4% 1⁄27 | 7% 2⁄29 | |
| s+ | 89% 24⁄27 | 69% 20⁄29 | |
|
Test used: Pearson test . | |||
##aqui
s1 <- Hmisc::summaryM(label
~ pheno,
data=msr %>% filter(igg=="igg1"),
overall=FALSE, test=TRUE)
Hmisc::latex(s1, caption='Sample covariates',
exclude1=TRUE, npct='both',
digits=3,
#prmsd=TRUE, brmsd=TRUE, #msdsize=mu$smaller2, #NOT-EVALUATE if PDF
middle.bold=TRUE, long = TRUE,
#legend.bottom = TRUE, #insert.bottom = TRUE,
what="%", html = TRUE, width="100%"
) #change here for LaTeX PDF
| Sample covariates. | |||
| asymptomatic N=24 |
symptomatic N=20 |
Test Statistic |
|
|---|---|---|---|
| label | χ22=2.18, P=0.336 | ||
| s- | 12% 3⁄24 | 30% 6⁄20 | |
| s0 | 17% 4⁄24 | 10% 2⁄20 | |
| s+ | 71% 17⁄24 | 60% 12⁄20 | |
|
Test used: Pearson test . | |||
##aqui
s1 <- Hmisc::summaryM(label
~ pheno,
data=msr %>% filter(igg=="igg2"),
overall=FALSE, test=TRUE)
Hmisc::latex(s1, caption='Sample covariates',
exclude1=TRUE, npct='both',
digits=3,
#prmsd=TRUE, brmsd=TRUE, #msdsize=mu$smaller2, #NOT-EVALUATE if PDF
middle.bold=TRUE, long = TRUE,
#legend.bottom = TRUE, #insert.bottom = TRUE,
what="%", html = TRUE, width="100%"
) #change here for LaTeX PDF
| Sample covariates. | |||
| asymptomatic N=24 |
symptomatic N=20 |
Test Statistic |
|
|---|---|---|---|
| label | χ22=4.73, P=0.094 | ||
| s- | 83% 20⁄24 | 70% 14⁄20 | |
| s0 | 8% 2⁄24 | 0% 0⁄20 | |
| s+ | 8% 2⁄24 | 30% 6⁄20 | |
|
Test used: Pearson test . | |||
##aqui
s1 <- Hmisc::summaryM(label
~ pheno,
data=msr %>% filter(igg=="igg3"),
overall=FALSE, test=TRUE)
Hmisc::latex(s1, caption='Sample covariates',
exclude1=TRUE, npct='both',
digits=3,
#prmsd=TRUE, brmsd=TRUE, #msdsize=mu$smaller2, #NOT-EVALUATE if PDF
middle.bold=TRUE, long = TRUE,
#legend.bottom = TRUE, #insert.bottom = TRUE,
what="%", html = TRUE, width="100%"
) #change here for LaTeX PDF
| Sample covariates. | |||
| asymptomatic N=24 |
symptomatic N=20 |
Test Statistic |
|
|---|---|---|---|
| label | χ22=1.59, P=0.451 | ||
| s- | 29% 7⁄24 | 20% 4⁄20 | |
| s0 | 0% 0⁄24 | 5% 1⁄20 | |
| s+ | 71% 17⁄24 | 75% 15⁄20 | |
|
Test used: Pearson test . | |||
##aqui
s1 <- Hmisc::summaryM(label
~ pheno,
data=msr %>% filter(igg=="igg4"),
overall=FALSE, test=TRUE)
Hmisc::latex(s1, caption='Sample covariates',
exclude1=TRUE, npct='both',
digits=3,
#prmsd=TRUE, brmsd=TRUE, #msdsize=mu$smaller2, #NOT-EVALUATE if PDF
middle.bold=TRUE, long = TRUE,
#legend.bottom = TRUE, #insert.bottom = TRUE,
what="%", html = TRUE, width="100%"
) #change here for LaTeX PDF
| Sample covariates. | |||
| asymptomatic N=24 |
symptomatic N=20 |
Test Statistic |
|
|---|---|---|---|
| label | χ22=25.4, P<0.001 | ||
| s- | 54% 13⁄24 | 0% 0⁄20 | |
| s0 | 21% 5⁄24 | 0% 0⁄20 | |
| s+ | 25% 6⁄24 | 100% 20⁄20 | |
|
Test used: Pearson test . | |||
msr %>%
mutate(label=forcats::fct_relevel(label,"s-","s+","s0"#,"s++"
)) %>%
ggplot(aes(x=Ab.units,fill=label)) +
#geom_bar(position = "fill") +
geom_histogram(position = "stack") +
facet_grid(~igg,scales = "free") +
labs(title="Proportion of seropositives")
msr %>%
filter(label=="s+") %>%
group_by(igg) %>% dplyr::summarise(min=min(Ab.units),
q25=quantile(Ab.units) %>% .[2],
mean=mean(Ab.units),
q50=quantile(Ab.units) %>% .[3],
q75=quantile(Ab.units) %>% .[4],
max=max(Ab.units))
msr %>%
mutate(label=forcats::fct_relevel(label,"s-","s+","s0"#,"s++"
)) %>%
ggplot(aes(x=Ab.units,fill=label)) +
#geom_bar(position = "fill") +
#scale_x_log10() +
geom_histogram(position = "stack") +
facet_grid(pheno~igg,scales = "free") +
labs(title="Proportion of seropositives")
mco %>% dim()
[1] 232 16
u <- mco %>% filter(pheno=="asymptomatic") %>%
dplyr::select(ID,igg,Ab.units) %>%
#mutate(Ab.units=log10(Ab.units)) %>%
spread(igg,Ab.units) %>%
dplyr::select(-ID) %>%
mutate(pheno="asymptomatic") %>%
mutate(pheno=forcats::fct_recode(pheno,
"Asymptomatic"="asymptomatic",
"Symptomatic"="symptomatic")) %>%
dplyr::rename("IgG"="igg","IgG1"="igg1",
"IgG2"="igg2","IgG3"="igg3","IgG4"="igg4")
v <- mco %>% filter(pheno=="symptomatic") %>%
dplyr::select(ID,igg,Ab.units) %>%
#mutate(Ab.units=log10(Ab.units)) %>%
spread(igg,Ab.units) %>%
dplyr::select(-ID) %>%
mutate(pheno="symptomatic") %>%
mutate(pheno=forcats::fct_recode(pheno,
"Asymptomatic"="asymptomatic",
"Symptomatic"="symptomatic")) %>%
dplyr::rename("IgG"="igg","IgG1"="igg1",
"IgG2"="igg2","IgG3"="igg3","IgG4"="igg4")
#par(mfrow=c(1,3))
a <- union(u,v) %>%
dplyr::select(-pheno)
a %>%
PerformanceAnalytics::chart.Correlation(method = "spearman",main="all",histogram = F)
b <- union(u,v) %>%
filter(pheno=="Asymptomatic") %>%
dplyr::select(-pheno)
b %>%
PerformanceAnalytics::chart.Correlation(method = "spearman",main="asymptomatic",histogram = F)
c <- union(u,v) %>%
filter(pheno=="Symptomatic") %>%
dplyr::select(-pheno)
c %>%
PerformanceAnalytics::chart.Correlation(method = "spearman",main="symptomatic",histogram = F)
#a
#par(mfrow=c(1,1))
#d <- c
#d <- b
#d <- c
#cor(d[which(complete.cases(d)),],method = "spearman")
cor.ext <- function(d) {#d <- a
e <- Hmisc::rcorr(as.matrix(d[which(complete.cases(d)),]), type="spearman")
#e
rr <- e$r %>% as.data.frame() %>% mutate_all(funs(if_else(.==1,NA_real_,.))) %>%
gather(ig,vl) %>% mutate(vl=format(vl,digits = 2)) %>% mutate(vl=as.numeric(vl)) %>%
group_by(ig) %>%
mutate(id=1:n()) %>%
spread(ig,vl) %>% dplyr::select(-id) %>% rownames_to_column() #%>% dplyr::select(-rowname)
pp <- e$P %>% as.data.frame() %>% mutate_all(funs(if_else(.<0.001,111,
if_else(.<0.01,11,
if_else(.<0.05,1,0))))) %>%
mutate_all(as.character) %>%
mutate_all(funs(if_else(.=="111","(***)",
if_else(.=="11","(**)",
if_else(.=="1","(*)","(ns)"))))) %>% rownames_to_column()
ee <- rr %>% gather(ig,vl) %>% mutate(vl="") %>% group_by(ig) %>% mutate(id=1:n()) %>% spread(ig,vl) %>% dplyr::select(-id) %>% dplyr::select(rowname,everything())
ppp <- as.matrix(pp)
rrr <- as.matrix(rr)
eee <- as.matrix(ee)
#eee <- rrr
eee[1,3] <- paste(rrr[1,3],ppp[1,3])
eee[1:2,4] <- paste(rrr[1:2,4],ppp[1:2,4])
#eee[2,4] <- paste(rrr[2,4],ppp[2,4])
eee[1:3,5] <- paste(rrr[1:3,5],ppp[1:3,5])
#eee[2,5] <- paste(rrr[2,5],ppp[2,5])
#eee[3,5] <- paste(rrr[3,5],ppp[3,5])
eee[1:4,6] <- paste(rrr[1:4,6],ppp[1:4,6])
ext <- eee %>% as_data_frame() %>% mutate(rowname=colnames(.)[-1]) #%>% #as.matrix()
#dplyr::rename("subtype"=rowname)
return(ext)
}
aa <- cor.ext(a) %>% #dplyr::rename("all"=rowname) %>%
column_to_rownames() %>% as.matrix() #%>% knitr::kable(format = "latex")
bb <- cor.ext(b) %>% #dplyr::rename("asympt"=rowname) %>%
column_to_rownames() %>% as.matrix()
cc <- cor.ext(c) %>% #dplyr::rename("sympt"=rowname) %>%
column_to_rownames() %>% as.matrix()
readr::write_rds(aa,"data/cor_all.rds")
readr::write_rds(bb,"data/cor_asymp.rds")
readr::write_rds(cc,"data/cor_sympt.rds")
aa
IgG IgG1 IgG2 IgG3 IgG4
IgG "" "0.53 (***)" "0.45 (**)" "0.54 (***)" "-0.12 (ns)"
IgG1 "" "" "0.24 (ns)" "0.47 (**)" " 0.16 (ns)"
IgG2 "" "" "" "0.69 (***)" " 0.25 (ns)"
IgG3 "" "" "" "" " 0.38 (*)"
IgG4 "" "" "" "" ""
bb
IgG IgG1 IgG2 IgG3 IgG4
IgG "" "0.548 (**)" "0.307 (ns)" "0.618 (**)" "-0.073 (ns)"
IgG1 "" "" "0.062 (ns)" "0.643 (***)" " 0.336 (ns)"
IgG2 "" "" "" "0.413 (*)" " 0.103 (ns)"
IgG3 "" "" "" "" " 0.277 (ns)"
IgG4 "" "" "" "" ""
cc
IgG IgG1 IgG2 IgG3 IgG4
IgG "" "0.52 (*)" "0.77 (***)" "0.68 (***)" "0.33 (ns)"
IgG1 "" "" "0.34 (ns)" "0.36 (ns)" "0.22 (ns)"
IgG2 "" "" "" "0.90 (***)" "0.57 (**)"
IgG3 "" "" "" "" "0.50 (*)"
IgG4 "" "" "" "" ""
#knitr::kable(cc,"latex")
ax <- t(as.matrix(c("","","","","")))
rownames(ax) <- "All individuals"
colnames(ax) <- c("IgG","IgG1","IgG2","IgG3","IgG4")
bx <- t(as.matrix(c("","","","","")))
rownames(bx) <- "Asymptomatics"
colnames(bx) <- c("IgG","IgG1","IgG2","IgG3","IgG4")
cx <- t(as.matrix(c("","","","","")))
rownames(cx) <- "Symptomatics"
colnames(cx) <- c("IgG","IgG1","IgG2","IgG3","IgG4")
dd <- rbind(ax,aa,bx,bb,cx,cc)[-c(6,12,18),-1]
readr::write_rds(dd,"data/cor_full.rds")
#mco %>%
# dplyr::select(pheno,igg,Ab.units,par) %>%
# GGally::ggpairs(mapping = aes(color = pheno))
union(u,v) %>%
dplyr::rename("Status"=pheno) %>%
GGally::ggscatmat(color = "Status",corMethod = "spearman") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title="Correlation between IgG subtypes per phenotype")# +scale_fill_grey(start = 0, end = .9)
# correlación general y estratificada
#union(u,v) %>%
# GGally::ggpairs(mapping = aes(color = pheno))
mco %>%
ggplot(aes(Ab.units,fill=pheno)) +
geom_density(position = "identity",alpha=0.6) +
#geom_rug() +
scale_x_log10()
mco %>% #summary(mco$par)
ggplot(aes(par,fill=pheno)) +
geom_density(position = "identity",alpha=0.6) +
#geom_rug() +
scale_x_log10()
mco %>%
ggplot(aes(Ab.units,par,fill=pheno)) +
geom_point(aes(colour=pheno)) +
scale_x_log10() +
scale_y_log10() +
facet_grid(pheno~igg,scales = "free") +
geom_smooth(aes(colour=pheno),method = lm)
u <- mco %>% filter(pheno=="asymptomatic") %>%
dplyr::select(ID,igg,Ab.units,par) %>%
#mutate(Ab.units=log10(Ab.units)) %>%
spread(igg,Ab.units) %>%
dplyr::select(-ID)
v <- mco %>% filter(pheno=="symptomatic") %>%
dplyr::select(ID,igg,Ab.units,par) %>%
#mutate(Ab.units=log10(Ab.units)) %>%
spread(igg,Ab.units) %>%
dplyr::select(-ID)
#union(u,v) %>%
# PerformanceAnalytics::chart.Correlation(histogram = FALSE,method = "spearman")
mco %>%
ggplot(aes(Ab.units,par)) +
geom_point(aes(colour=pheno)) +
scale_x_log10() +
scale_y_log10() +
facet_grid(pheno~igg
#,scales = "free"
) +
geom_smooth(method = lm)
f <- mco %>% mutate(igg=as.factor(igg)) %>% .$igg
g <- mco %>% mutate(pheno=as.factor(pheno)) %>% .$pheno
#mco <- mco %>%
# #dplyr::select(Ab.units,par) %>%
# mutate(Ab.units=log10(Ab.units),par=log10(par)) %>%
# filter(par!=-Inf) #%>%
stt <- data_frame(estimate=as.double(),
##estimate1=as.double(),
##estimate2=as.double(),
statistic=as.double(),
p.value=as.double(),
parameter=as.double(),#
conf.low=as.double(),#
conf.high=as.double(),#
method=as.factor(NULL),
alternative=as.factor(NULL),
igg=as.character(),
pheno=as.character(),
signf=as.character(),
rho=as.character(),
lab=as.character())
for (l in 1:length(levels(f))) {
for (k in 1:length(levels(g))) {
## Primero,
j <- mco %>% filter(par!=0) %>% filter(igg==levels(f)[l] & pheno==levels(g)[k]) %>% #l #k
#t.test(Ab.units_log ~ pheno, data = ., var.equal= i$p.value>0.05) %>%
#wilcox.test(Ab.units_log ~ pheno, data = .) %>%#,
#conf.int=TRUE) %>%
cor.test(~ log10(Ab.units) + log10(par),
data = ., method = "pearson") %>%
broom::tidy() %>% #%>% format(digits=2)
#dplyr::select(-starts_with("estimate")) %>%
mutate(igg=levels(f)[l],#l
pheno=levels(g)[k],#k
signf=if_else(p.value<0.001,"ooo",
if_else(p.value<0.01,"oo",
if_else(p.value<0.05,"o","ns"#""#
))),
rho= estimate %>% format(digits=2),
lab= paste0(#"t=",#should be done -> mix variance equality test
#"W=",#non-parametric used in literature
#"S=",f$statistic,", rho="
"rho=",estimate %>% format(digits=2),
"\nP",ifelse(p.value<0.001,"<0.001",
paste0("=",p.value %>% format(digits=2)))
)
)
#i <- mco %>% filter(igg==levels(f)[l] & pheno==levels(g)[k]) %>% #l #k
# #t.test(Ab.units_log ~ pheno, data = ., var.equal= i$p.value>0.05) %>%
# #wilcox.test(Ab.units_log ~ pheno, data = .) %>%#,
# #conf.int=TRUE) %>%
# cor.test(~ Ab.units + par,
# data = ., method = "spearman") %>%
# broom::tidy() %>% #%>% format(digits=2)
# #dplyr::select(-starts_with("estimate")) %>%
# mutate(igg=levels(f)[l],#l
# pheno=levels(g)[k],#k
# signf=if_else(p.value<0.001,"(***)",
# if_else(p.value<0.01,"(**)",
# if_else(p.value<0.05,"(*)","(ns)"))),
# rho= estimate %>% format(digits=2),
# lab= paste0(#"t=",#should be done -> mix variance equality test
# #"W=",#non-parametric used in literature
# #"S=",f$statistic,", rho="
# "rho=",estimate %>% format(digits=2)#,
# #"\nP",ifelse(p.value<0.001,"<0.001",
# # paste0("=",p.value %>% format(digits=2)))
# )
# )
stt <- stt %>% union(j) #%>% union(i)
}
}
sttt <- stt %>% arrange(igg,pheno) %>% dplyr::select(igg,pheno,everything()) %>%
mutate(x=if_else(igg=="igg1"|igg=="igg3"|igg=="igg4",10,
if_else(igg=="igg2",.7,.1)),
y=60000)
# plot
specie_name <- c("asymptomatic"="Asymptomatic","symptomatic"="Symptomatic",
"igg"="IgG","igg1"="IgG1","igg2"="IgG2","igg3"="IgG3","igg4"="IgG4"
)
mco %>%
filter(par!=0) %>%
ggplot(aes(Ab.units,par)) +
geom_point(#aes(colour=pheno)
) +
scale_x_log10() +
scale_y_log10(limits=c(100,65000)
) +
facet_grid(pheno~igg,labeller = as_labeller(specie_name)
,scales = "free_x"
) +
#geom_smooth(method = lm,se=F,col="grey50",lwd=.5) +
#geom_smooth(method = loess,se=F,col="grey50",lwd=.5) +
geom_text(aes(x,y,label=paste0("r","==",
rho,"^",signf
)),data = sttt, parse = T,
vjust=1,hjust=0.05,size=3.5) +
#labs(#title="Correlation between AU and parasitemia per IgG subtype and phenotype",
# caption="oo: P < 0.01; o: P < 0.05; ns: non-significant"
# ) +
xlab("Antibody units (log scale)") +
ylab(expression(paste("Parasite/",mu,"L"," (log scale)"))) #+ #ylab("Parasite/uL") +
#coord_cartesian(ylim = c(0,20000))
#scale_color_grey(#start = 0, end = .9,
# name="Carrier",labels=c("Asymptomatic","Symptomatic"#,"Indeterminate"
# ))
d <- data.frame(x=1:3,y=1:3)
qplot(x, y, data=d) + geom_text(aes(2, 2.5,
label="rho~and~some~other~text"), parse=TRUE)
f <- mco %>% mutate(igg=as.factor(igg)) %>% .$igg
#mco <- mco %>%
# #dplyr::select(Ab.units,par) %>%
# mutate(Ab.units=log10(Ab.units),par=log10(par)) %>%
# filter(par!=-Inf) #%>%
stt <- data_frame(estimate=as.double(),
##estimate1=as.double(),
##estimate2=as.double(),
statistic=as.double(),
p.value=as.double(),
#parameter=as.double(),#
#conf.low=as.double(),#
#conf.high=as.double(),#
method=as.factor(NULL),
alternative=as.factor(NULL),
igg=as.character(),
lab=as.character())
for (l in 1:length(levels(f))) {
## Primero,
j <- mco %>% filter(igg==levels(f)[l]) %>% #l
#t.test(Ab.units_log ~ pheno, data = ., var.equal= i$p.value>0.05) %>%
#wilcox.test(Ab.units_log ~ pheno, data = .) %>%#,
#conf.int=TRUE) %>%
cor.test(~ Ab.units + par,
data = ., method = "spearman") %>%
broom::tidy() %>% #%>% format(digits=2)
#dplyr::select(-starts_with("estimate")) %>%
mutate(igg=levels(f)[l],#l
lab= paste0(#"t=",#should be done -> mix variance equality test
#"W=",#non-parametric used in literature
#"S=",f$statistic,", rho="
"rho=",estimate %>% format(digits=2),
"\nP",ifelse(p.value<0.001,"<0.001",
paste0("=",p.value %>% format(digits=2)))
)
)
stt <- stt %>% union(j)
}
stt <- stt %>% arrange(igg) %>% dplyr::select(igg,everything()) %>%
mutate(x=.1,
y=200)
# plot
mco %>% #filter(igg==levels(f)[1]) %>%
ggplot(aes(Ab.units,par)) +
geom_point() +
scale_x_log10() +
scale_y_log10() +
geom_smooth(method = lm,se=F,col="grey50",lwd=.5) +
#geom_smooth(method = loess,se=F,col="grey50",lwd=.5) +
geom_text(aes(x,y,label=lab),data = stt,
vjust=.5,hjust=0.05,size=3.5) +
facet_grid(~igg
#,scales = "free"
) +
#facet_wrap(~igg,ncol = 5)
labs(title="Correlation between AU and parasitemia per IgG subtype")
age <- mco %>%
dplyr::select(ID, code, pheno, igg, Ab.units) %>%
full_join(mcv,by=c("ID","pheno"))
age %>% #summary(age$edad)
ggplot(aes(Ab.units,fill=pheno)) +
geom_density(position = "identity",alpha=0.7) +
facet_wrap(~igg,scales = "free",ncol = 5) #+scale_x_log10()
age %>% #summary(age$edad)
ggplot(aes(edad,fill=pheno)) +
geom_density(position = "identity",alpha=0.7)
f <- age %>% mutate(igg=as.factor(igg)) %>% .$igg
g <- age %>% mutate(pheno=as.factor(pheno)) %>% .$pheno
#mco <- mco %>%
# #dplyr::select(Ab.units,par) %>%
# mutate(Ab.units=log10(Ab.units),par=log10(par)) %>%
# filter(par!=-Inf) #%>%
stt <- data_frame(estimate=as.double(),
##estimate1=as.double(),
##estimate2=as.double(),
statistic=as.double(),
p.value=as.double(),
#parameter=as.double(),#
#conf.low=as.double(),#
#conf.high=as.double(),#
method=as.factor(NULL),
alternative=as.factor(NULL),
igg=as.character(),
pheno=as.character(),
signf=as.character(),
rho=as.character(),
lab=as.character())
for (l in 1:length(levels(f))) {
for (k in 1:length(levels(g))) {
## Primero,
j <- age %>% filter(igg==levels(f)[l] & pheno==levels(g)[k]) %>% #l #k
#t.test(Ab.units_log ~ pheno, data = ., var.equal= i$p.value>0.05) %>%
#wilcox.test(Ab.units_log ~ pheno, data = .) %>%#,
#conf.int=TRUE) %>%
cor.test(~ Ab.units + edad,
data = ., method = "spearman") %>%
broom::tidy() %>% #%>% format(digits=2)
#dplyr::select(-starts_with("estimate")) %>%
mutate(igg=levels(f)[l],#l
pheno=levels(g)[k],#k
signf=if_else(p.value<0.001,"ooo",
if_else(p.value<0.01,"oo",
if_else(p.value<0.05,"o","ns"#""#
))),
rho= estimate %>% format(digits=2),
lab= paste0(#"t=",#should be done -> mix variance equality test
#"W=",#non-parametric used in literature
#"S=",f$statistic,", rho="
"rho=",estimate %>% format(digits=2),
"\nP",ifelse(p.value<0.001,"<0.001",
paste0("=",p.value %>% format(digits=2)))
)
)
#i <- age %>% filter(igg==levels(f)[l] & pheno==levels(g)[k]) %>% #l #k
# #t.test(Ab.units_log ~ pheno, data = ., var.equal= i$p.value>0.05) %>%
# #wilcox.test(Ab.units_log ~ pheno, data = .) %>%#,
# #conf.int=TRUE) %>%
# cor.test(~ Ab.units + edad,
# data = ., method = "spearman") %>%
# broom::tidy() %>% #%>% format(digits=2)
# #dplyr::select(-starts_with("estimate")) %>%
# mutate(igg=levels(f)[l],#l
# pheno=levels(g)[k],#k
# lab= paste0(#"t=",#should be done -> mix variance equality test
# #"W=",#non-parametric used in literature
# #"S=",f$statistic,", rho="
# "rho=",estimate %>% format(digits=2),
# "\nP",ifelse(p.value<0.001,"<0.001",
# paste0("=",p.value %>% format(digits=2)))
# )
# )
stt <- stt %>% union(j) #%>% union(i)
}
}
sttu <- stt %>% arrange(igg,pheno) %>% dplyr::select(igg,pheno,everything()) %>%
mutate(x=.1,
y=78)
# plot
specie_name <- c("asymptomatic"="Asymptomatic","symptomatic"="Symptomatic",
"igg"="IgG","igg1"="IgG1","igg2"="IgG2","igg3"="IgG3","igg4"="IgG4"
)
age %>%
ggplot(aes(Ab.units,edad)) +
geom_point(#aes(colour=pheno)
) +
#scale_x_log10() +
#scale_y_log10() +
facet_grid(pheno~igg,labeller = as_labeller(specie_name)
,scales = "free_x"
) +
#geom_smooth(method = lm,se=F,col="grey50",lwd=.5) +
#geom_smooth(method = loess,se=F,col="grey50",lwd=.5) +
geom_text(aes(x,y,label=paste0("rho","==",
rho,"^",signf
)),data = sttu, parse = T,
vjust=.5,hjust=0.05,size=3.5) +
#labs(title="Correlation between AU and parasitemia per IgG subtype and phenotype") +
xlab("Antibody units") + ylab("Age (years)") +
coord_cartesian(ylim = c(0,85))
#scale_color_grey(#start = 0, end = .9,
# name="Carrier",labels=c("Asymptomatic","Symptomatic"#,"Indeterminate"
# ))
f <- age %>% mutate(igg=as.factor(igg)) %>% .$igg
#mco <- mco %>%
# #dplyr::select(Ab.units,par) %>%
# mutate(Ab.units=log10(Ab.units),par=log10(par)) %>%
# filter(par!=-Inf) #%>%
stt <- data_frame(estimate=as.double(),
##estimate1=as.double(),
##estimate2=as.double(),
statistic=as.double(),
p.value=as.double(),
#parameter=as.double(),#
#conf.low=as.double(),#
#conf.high=as.double(),#
method=as.factor(NULL),
alternative=as.factor(NULL),
igg=as.character(),
lab=as.character())
for (l in 1:length(levels(f))) {
## Primero,
j <- age %>% filter(igg==levels(f)[l]) %>% #l
#t.test(Ab.units_log ~ pheno, data = ., var.equal= i$p.value>0.05) %>%
#wilcox.test(Ab.units_log ~ pheno, data = .) %>%#,
#conf.int=TRUE) %>%
cor.test(~ Ab.units + edad,
data = ., method = "spearman") %>%
broom::tidy() %>% #%>% format(digits=2)
#dplyr::select(-starts_with("estimate")) %>%
mutate(igg=levels(f)[l],#l
lab= paste0(#"t=",#should be done -> mix variance equality test
#"W=",#non-parametric used in literature
#"S=",f$statistic,", rho="
"rho=",estimate %>% format(digits=2),
"\nP",ifelse(p.value<0.001,"<0.001",
paste0("=",p.value %>% format(digits=2)))
)
)
stt <- stt %>% union(j)
}
stt <- stt %>% arrange(igg) %>% dplyr::select(igg,everything()) %>%
mutate(x=.1,
y=55)
# plot
age %>% #filter(igg==levels(f)[1]) %>%
ggplot(aes(Ab.units,edad)) +
geom_point() +
scale_x_log10() +
#scale_y_log10() +
geom_smooth(method = lm,se=F,col="grey50",lwd=.5) +
#geom_smooth(method = loess,se=F,col="grey50",lwd=.5) +
geom_text(aes(x,y,label=lab),data = stt,
vjust=.5,hjust=0.05,size=3.5) +
facet_grid(~igg
#,scales = "free"
) +
#facet_wrap(~igg,ncol = 5)
labs(title="Correlation between AU and parasitemia per IgG subtype")
case=20
ctrl=24
m_c=min(case,ctrl) #limiting sample size
p=5 #current number of predictors
#round(m_c/15) #
p<m_c/15
[1] FALSE
k=5 #number of covariates
p= 0.4 # prevalence of outcome
n_x=10*k/p
# exposition: symptomatic malaria (60% asympt in Pf)
#
# given the proportion (prevalence) relevant OR and required power,
# obtain the sample size required
p=0.40 #proportion of exposure in general population OR outcome provalence
#(OR=pA*(1-pB)/pB/(1-pA)) # 2
OR=.5 #hypothetical OR
#
kappa=1 # sampling ratio between case:control
alpha=0.05 #type 1 error (false positives)
beta=0.20 #power=1-beta #type 2 error (false negatives)
#(n= (((1+kappa)^2)*((qnorm(1-alpha/2)+qnorm(1-beta))^2)) / (kappa*p*(1-p)*((log(OR))^2)) )
n1= (4*((qnorm(1-alpha/2)+qnorm(1-beta))^2)) / (p*(1-p)*((log(OR))^2))
#ceiling(n) # Afridi 220, Bragga 263, Stanisic 206, Medeiros 28+24
# given the available sample size and obtained Odds Ratio,
# obtain the actual power
n2= 44
#(OR=.5)
z=log(OR)*sqrt(n2)/sqrt(((1+kappa)^2)/(kappa*p*(1-p)))
Power=pnorm(z-qnorm(1-alpha/2))+pnorm(-z-qnorm(1-alpha/2))
#library(tidyverse)
mmo <- mco %>% #filter(igg=="igg") %>%
mutate(pheno=as.factor(pheno)) %>%
mutate(pheno=forcats::fct_relevel(pheno,"symptomatic")) %>%
mutate(pheno.num=ifelse(pheno=="asymptomatic",0,1)) %>%
mutate(pheno.log=ifelse(pheno=="asymptomatic",FALSE,TRUE))
2235 are for the asymptomatic state!cova %>%
covb %>%
covx %>%
filter(codigo=="2235" | codigo=="3053" | codigo=="9165" | codigo=="9801") %>%
arrange(codigo) #%>%
#dplyr::select(codigo,edad,sexo,par,everything())
## add covariates
full_join(mmo,mcv,by=c("ID","par","pheno")) %>%
filter(ID=="2235" | ID=="3053" | ID=="9165" | ID=="9801") %>%
arrange(ID)
mcv_t <- mcv %>% #filter(igg=="igg") %>%
mutate(pheno=as.factor(pheno)) %>%
mutate(pheno=forcats::fct_relevel(pheno,"symptomatic")) %>%
mutate(pheno.num=ifelse(pheno=="asymptomatic",0,1)) %>%
mutate(pheno.log=ifelse(pheno=="asymptomatic",FALSE,TRUE))
### MULTIPLE LOGISTIC REGRESSION
mmo_x <- mmo %>%
dplyr::select(ID,
#code,
#pheno,
igg
,Ab.units
#,Ab.units_log
,pheno.num#,pheno.log # 1: symptomatic, 0:: asymptomatic
) %>%
spread(igg
,Ab.units
#,Ab.units_log
)
#mmo_x.omit = na.omit(mmo_x)
mlr <- mcv_t %>%
dplyr::select(ID,pheno.num, edad, sexo, par) %>%
inner_join(mmo_x,by=c("ID","pheno.num"))
mlr_na = na.omit(mlr)
#dd <- rms::datadist(mlr_na); options(datadist='dd')
# https://stackoverflow.com/questions/7505547/detach-all-packages-while-working-in-r
detachAllPackages <- function() {
basic.packages <- c("package:stats",
"package:graphics",
"package:grDevices",
"package:utils",
"package:datasets",
"package:methods",
"package:base")
package.list <- search()[
ifelse(
unlist(
gregexpr("package:",search())
)==1,TRUE,FALSE
)
]
package.list <- setdiff(package.list,basic.packages)
if (length(package.list)>0)
for (package in package.list)
detach(package, character.only=TRUE)
}
detachAllPackages()
# EXPLAINED: https://stats.stackexchange.com/questions/64788/interpreting-a-logistic-regression-model-with-multiple-predictors
library(tidyverse)
mmo_x_rms <- mlr_na %>%
mutate(igg_c=ntile(igg,3),
igg1_c=ntile(igg1,3),
igg2_c=ntile(igg2,3),
igg3_c=ntile(igg3,3),
igg4_c=ntile(igg4,3)
) %>%
mutate(igg_c=as.factor(igg_c),
igg1_c=as.factor(igg1_c),
igg2_c=as.factor(igg2_c),
igg3_c=as.factor(igg3_c),
igg4_c=as.factor(igg4_c)) %>%
mutate(igg_d=igg/10,
igg1_d=igg1/10,
igg2_d=igg2/10,
igg3_d=igg3/10,
igg4_d=igg4/10) %>%
mutate(igg_l=log10(igg),
igg1_l=log10(igg1),
igg2_l=log10(igg2),
igg3_l=log10(igg3),
igg4_l=log10(igg4)) #%>%
#dplyr::select(ID,pheno.num,
# igg=igg_c,
# igg1=igg1_c,
# igg2=igg2_c,
# igg3=igg3_c,
# igg4=igg4_c
# )
dd <- rms::datadist(mmo_x_rms); options(datadist='dd')
mmo %>% dplyr::count(igg)
1. Miura K, Orcutt AC, Muratova OV, Miller LH, Saul A, Long CA. Development and characterization of a standardized ELISA including a reference serum on each plate to detect antibodies induced by experimental malaria vaccines. Vaccine. 2008;26(2):193-200. doi:10.1016/j.vaccine.2007.10.064.
2. Hughes S. Plater: Read, Tidy, and Display Data from Microtiter Plates.; 2016. https://CRAN.R-project.org/package=plater.
3. Wickham H. Readxl: Read Excel Files.; 2016. https://CRAN.R-project.org/package=readxl.
4. Garmonsway D. Tidyxl: Read Untidy Excel Files. https://github.com/nacnudus/tidyxl.
5. Ritz C, Baty F, Streibig JC, Gerhard D. Dose-response analysis using r. Xia Y, ed. PLOS ONE. 2015;10(12):e0146021. doi:10.1371/journal.pone.0146021.
6. Sveidqvist K, Bostock M, Pettitt C, Daines M, Kashcha A, Iannone R. DiagrammeR: Create Graph Diagrams and Flowcharts Using R.; 2016. https://CRAN.R-project.org/package=DiagrammeR.